/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.StringTokenizer;

import mosdi.fa.Alphabet;
import mosdi.fa.GeneralizedString;

public class Iupac {

	private static final Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
	private static final Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();
	private static int[] multiplicityTable = null;
	private static final BitArray[] iupacToBitArrayTable = createIupacToBitArrayTable();

	public static BitArray iupacCharToBitArray(Alphabet alphabet, char c) {
		BitArray ba = new BitArray(alphabet.size());
		// IUPAC Alphabet, see http://www.bioinformatics.org/sms2/iupac.html
		// A.................Adenine
		// C.................Cytosine
		// G.................Guanine
		// T (or U)..........Thymine (or Uracil)
		// R.................A or G
		// Y.................C or T
		// S.................G or C
		// W.................A or T
		// K.................G or T
		// M.................A or C
		// B.................C or G or T
		// D.................A or G or T
		// H.................A or C or T
		// V.................A or C or G
		// N.................any base
		if ((c=='A')||(c=='R')||(c=='W')||(c=='M')||(c=='D')||(c=='H')||(c=='V')||(c=='N')) {
			ba.set(alphabet.getIndex('A'), true);
		}
		if ((c=='C')||(c=='Y')||(c=='S')||(c=='M')||(c=='B')||(c=='H')||(c=='V')||(c=='N')) {
			ba.set(alphabet.getIndex('C'), true);
		}
		if ((c=='G')||(c=='R')||(c=='S')||(c=='K')||(c=='B')||(c=='D')||(c=='V')||(c=='N')) {
			ba.set(alphabet.getIndex('G'), true);
		}
		if ((c=='T')||(c=='Y')||(c=='W')||(c=='K')||(c=='B')||(c=='D')||(c=='H')||(c=='N')) {
			ba.set(alphabet.getIndex('T'), true);
		}
		if (ba.allZero()) {
			throw new IllegalArgumentException("Invalid IUPAC-String");
		}
		return ba;
	}

	public static List<GeneralizedString> toGeneralizedStrings(String iupacPattern, boolean considerReverse) {
		List<GeneralizedString> result = new ArrayList<GeneralizedString>(2);
		result.add(Iupac.toGeneralizedString(iupacPattern));
		if (considerReverse) {
			result.add(Iupac.toGeneralizedString(reverseComplementary(iupacPattern)));
		}
		return result;
	}

	public static List<GeneralizedString> toGeneralizedStrings(int[] iupacPattern, boolean considerReverse) {
		List<GeneralizedString> result = new ArrayList<GeneralizedString>(2);
		result.add(Iupac.toGeneralizedString(iupacPattern));
		if (considerReverse) {
			result.add(Iupac.toGeneralizedString(reverseComplementary(iupacPattern)));
		}
		return result;
	}

	/** Converts a IUPAC character into a BitArray over the DNA alphabet.
	 *  @param iupacChar Index of a IUPAC character according to Alphabet.getIupacAlphabet().
	 *  @return BitArray of length 4 according to Alphabet.getDNAAlphabet().
	 */
	public static BitArray iupacCharToBitArray(int iupacChar) {
		return iupacToBitArrayTable[iupacChar];
	}

	/** Converts all IUPAC character into BitArrays over the DNA alphabet.
	 *  @return BitArrays of length 4 according to Alphabet.getDNAAlphabet().
	 */
	public static BitArray[] asGeneralizedAlphabet() {
		return Arrays.copyOf(iupacToBitArrayTable, iupacToBitArrayTable.length);
	}

	/** Determines whether char1 is a subset of char2. The characters are given
	 *  w.r.t. Alphabet.getIupacAlphabet(). */
	public static boolean subsetOf(int char1, int char2) {
		return iupacCharToBitArray(char1).subsetOf(iupacCharToBitArray(char2));
	}

	private static BitArray[] createIupacToBitArrayTable() {
		Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		BitArray[] result = new BitArray[iupacAlphabet.size()];
		for (int c=0; c<iupacAlphabet.size(); ++c) {
			result[c] = iupacCharToBitArray(dnaAlphabet,iupacAlphabet.get(c));
		}
		return result;
	}

	/** Returned result is a generalized string over Alphabet.getDnaAlphabet(). */
	public static GeneralizedString toGeneralizedString(int[] iupacString) {
		BitArray[] positions = new BitArray[iupacString.length];
		for (int i=0; i<iupacString.length; ++i) {
			positions[i] = iupacToBitArrayTable[iupacString[i]];
		}
		GeneralizedString gs = new GeneralizedString(dnaAlphabet, positions);
		return gs;
	}

	/** Returned result is a generalized string over Alphabet.getDnaAlphabet(). */
	public static GeneralizedString toGeneralizedString(String iupacString) {
		BitArray[] positions = new BitArray[iupacString.length()];
		for (int i=0; i<iupacString.length(); ++i) {
			positions[i]=iupacCharToBitArray(dnaAlphabet, iupacString.charAt(i));
		}
		GeneralizedString gs = new GeneralizedString(dnaAlphabet, positions);
		return gs;
	}

	public static int complementary(int c) {
		switch (c) {
		case  0: return 11;
		case  1: return 12;
		case  2: return 4;
		case  3: return 5;
		case  4: return 2;
		case  5: return 3;
		case  6: return 7;
		case  7: return 6;
		case  8: return 8;
		case  9: return 14;
		case 10: return 10;
		case 11: return 0;
		case 12: return 1;
		case 13: return 13;
		case 14: return 9;
		default: throw new IllegalArgumentException();
		}
	}

	public static char complementary(char c) {
		switch (c) {
		case 'A': return 'T';
		case 'C': return 'G';
		case 'G': return 'C';
		case 'T': return 'A';
		case 'R': return 'Y';
		case 'Y': return 'R';
		case 'S': return 'S';
		case 'W': return 'W';
		case 'K': return 'M';
		case 'M': return 'K';
		case 'B': return 'V';
		case 'D': return 'H';
		case 'H': return 'D';
		case 'V': return 'B';
		case 'N': return 'N';
		default: throw new IllegalArgumentException();
		}
	}

	/** Transforms a IUPAC String into its reverse complementary. */
	public static String reverseComplementary(String forward) {
		StringBuilder sb = new StringBuilder(forward.length());
		for (int i=forward.length()-1; i>=0; --i) {
			sb.append(complementary(forward.charAt(i)));
		}
		return sb.toString();
	}

	public static int[] reverseComplementary(int[] forward) {
		int[] reverse = new int[forward.length];
		int j = 0;
		for (int i=forward.length-1; i>=0; --i) {
			reverse[j++] = complementary(forward[i]);
		}
		return reverse;
	}

	public static int[] complementary(int[] pattern) {
		int[] complement = new int[pattern.length];
		for (int i=0; i<pattern.length; ++i) {
			complement[i] = complementary(pattern[i]);
		}
		return complement;
	}

	public static int[] parseAbelianPattern(String s) {
		StringTokenizer stChars = new StringTokenizer(s,"0123456789",false);
		StringTokenizer stFreq = new StringTokenizer(s,"ACGTRYSWKMBDHVN",false);
		Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();
		int[] result = new int[iupacAlphabet.size()];
		while (stChars.hasMoreTokens()) {
			String token = stChars.nextToken();
			if (token.length()!=1) return null;
			if (!stFreq.hasMoreTokens()) return null;
			int freq = Integer.parseInt(stFreq.nextToken());
			result[iupacAlphabet.getIndex(token.charAt(0))]=freq;
		}
		return result;
	}

	private static void buildMultiplicityTable() {
		multiplicityTable = new int[iupacAlphabet.size()];
		String s = "ACGTRYSWKMBDHVN";
		GeneralizedString gs = toGeneralizedString(s);
		for (int i=0; i<iupacAlphabet.size(); ++i) {
			multiplicityTable[iupacAlphabet.getIndex(s.charAt(i))] = gs.getPosition(i).numberOfOnes();
		}
	}

	public static int getCharacterMultiplicity(int charIndex) {
		if (multiplicityTable==null) buildMultiplicityTable();
		return multiplicityTable[charIndex];
	}

	public static int getCharacterMultiplicity(char c) {
		try {
			return getCharacterMultiplicity(iupacAlphabet.getIndex(c));
		} catch (ArrayIndexOutOfBoundsException e) {
			throw new IllegalArgumentException("Invalid IUPAC character: \""+c+"\"");
		}
	}

}
